// this file is ALWAYS called by another do-file

* * *   * * *   * * *   * * *   * * *   * * *
*   LOAD & PREPARE THE DATA   
* * *   * * *   * * *   * * *   * * *   * * *

frame create prepdta2
frame change prepdta2


use "$mypathRR/Datasets/ESTV/allcant_taxpayers_coll_B.dta", clear
keep snp_gdenr year control n_taxpayers_tot n_taxpayers_ctr reink_ctr   reink_tot  cant
duplicates drop
drop if snp_gdenr == .


// correct the outliers
	* winsorize observations with a high reink
	winsor2 reink_ctr if control == 1, cuts(0 99) suffix(_w) by(year)
	winsor2 reink_tot , cuts(0 98) suffix(_w) by(year)
	
// get income and revenue sums by municipality for treated and untreated
preserve
drop if control==1
keep snp_gdenr year n_taxpayers_ctr reink_ctr reink_ctr_w 
rename *_ctr *_ctrl

tempfile control
save `control'
restore

preserve
drop if control==0 
keep snp_gdenr year n_taxpayers_ctr reink_ctr reink_ctr_w

tempfile treated
save `treated'
restore


// get number of rich
preserve
drop if control==0 
keep snp_gdenr year n_taxpayers_ctr 

rename n_taxpayers_ctr n200K
	label var n200K "Number of high-income taxpayers"
	
tempfile rich
save `rich'
restore


* add the other variables
keep if control == 0
drop n_taxpayers_ctr reink_ctr reink_ctr_w
drop control
merge 1:1 snp_gdenr year using `treated'
merge 1:1 snp_gdenr year using `rich', nogen
merge 1:1 snp_gdenr year using `control', nogen


* fill the gaps of the municipality-years that do not exist in treated data
foreach var in n_taxpayers_ctr reink_ctr reink_ctr_w n200K {
	replace `var' = 0 if _merge == 1 & `var' == . 
}
drop _merge

rename snp_gdenr gemeinde
sort gemeinde year


* label municipalities
drop if gemeinde==.
run "$mypathRR/Resources/labels-municipalities.do"
label values gemeinde GEMEINDE

* generate a string variable with the municipality names
sdecode gemeinde, gen(Gemeinde) 

* xtset the data: municipality-year panel
xtset gemeinde year

* popluation weights get the the relative importance of each canton right:
tab cant [fweight = n_taxpayers_tot]
tab cant 


// Generate and label the dependent variables

* gen average income per capita in the canton
foreach var in reink  {
	gen `var'_sum = `var'_tot * n_taxpayers_tot
	gen `var'_sum_ctr = `var'_ctr * n_taxpayers_ctr
    gen `var'_sum_ctr_w = `var'_ctr_w * n_taxpayers_ctr
	gen `var'_sum_ctrl = `var'_ctrl * n_taxpayers_ctrl

}
rename reink_tot reink

foreach var in reink reink_ctr reink_ctr_w reink_ctrl {
	replace `var'=`var'/10
	label var `var' "Net income p.c. (in 1000 CHF)"
}

* gen log incomes
foreach var in reink reink_ctr reink_ctr_w reink_ctrl {
	gen ln_`var' = ln(`var')
}

* gen share of high-income taxpayers
gen share200K = n200K/n_taxpayers_tot*100
	label var share200K "Share of high-income taxpayers (in %)"

gen ln_n200K = ln(n200K)
	label var ln_n200K "Log number of high-income taxpayers"
	
gen ln_share200K = ln(share200K)
	label var ln_share200K "Log share of high-income taxpayers (in %)"


label var n_taxpayers_tot "Total number of taxpayers"

label var ln_reink "Log of net income p.c. (in 1000 CHF)"
label var reink_ctr "Net income p.c. of high-income taxpayers (in 1000 CHF)"
label var reink_ctr_w "Net income p.c. of high-income taxpayers (in 1000 CHF) [top 1% winsorized]"
label var ln_reink_ctr "Log of net income p.c. of high-income taxpayers (in 1000 CHF)"
label var ln_reink_ctr_w "Log of net income p.c. of high-income taxpayers (in 1000 CHF) [top 1% winsorized]"
label var reink_ctrl "Net income p.c. of other taxpayers (in 1000 CHF)"
label var ln_reink_ctrl "Log of net income p.c. of other taxpayers (in 1000 CHF)"
label var n_taxpayers_ctr "Number of high-income taxpayers"
label var n_taxpayers_ctrl "Number of other taxpayers "


* Gen canton treatment dummies
gen treated = (cant == 6)
  label var treated "Treated canton"
  
gen period = (year > 2005 & year!=.)
  label var period "Period $ t>2005$"

* Gen DiD Interaction term
gen Interaction = treated*period
  label var Interaction "DiD"


* Gen interaction terms for pre-treatment periods
forval n=1/35 {
local i= 2006 -`n'
gen pre`n'=0
replace pre`n'= 1 if treated == 1 & year == `i'
label var pre`n' "`i' (treatment lag `n')"
}
replace pre1 = 0 // make 2005 the reference year


* drop dummies for years that do not exist in tax data and / or no tax data exists for OW
drop pre34 pre32 pre30 pre28 pre26 pre24 pre22 pre21 pre20 pre18 pre16 pre14 pre12 pre10 pre8 pre7 pre6 
// (it is important for event study estimation to have a balanced sample!)


* Gen interaction terms for post-treatment periods
forval n=1/11 {
local i = 2005 + `n'
gen post`n' = 0
replace post`n' = 1 if treated == 1 & year == `i'
label var post`n' "`i' (treatment lead `n')"
}

* Gen canton-specific time trends
	tab cant, gen (cant_d)
	foreach var of varlist cant_d* {
	gen trend_`var' = year*`var'
	label var trend_`var' "Canton specific time trend"
	}
	gen trend = year


* * * * define the correct weight variable for each outcome * * * * 

* get number of taxpayers in pre-treatment period
	frame put n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_ctr cant year gemeinde , into(weights1)
	frame change weights1
		
		keep if year < 2006
		collapse n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_ctr, by(cant gemeinde)
		foreach var in n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_ctr {
			replace `var' = round(`var')
			rename `var' `var'_pre2005
		}

	frame change prepdta2


	frame put n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_ctr cant year gemeinde , into(weights2)
	frame change weights2

		keep if year == 2005
		drop year
		foreach var in n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_ctr {
			rename `var' `var'_2005
		}
		
	frame change prepdta2

* link back to main frame
	frlink m:1 gemeinde, frame(weights1) gen(link_weights1)
	frget n_taxpayers_tot_pre2005 n_taxpayers_ctrl_pre2005 n_taxpayers_ctr_pre2005, from(link_weights1)
	// 499 missings, due to "new" municipalities appearing after 2005
	
	frlink m:1 gemeinde, frame(weights2) gen(link_weights2)
	frget n_taxpayers_tot_2005 n_taxpayers_ctrl_2005 n_taxpayers_ctr_2005, from(link_weights2)
	// 3,915  missings, due to mergers/splits of municipalities before and after 2005
	
	/*
	tab cant if n_taxpayers_tot_2005 == .
	// -> none of the treated municipalities in OW are affected.
	gen diff_tot_2005 = n_taxpayers_tot_2005 - n_taxpayers_tot
	gen diff_tot_pre2005 = n_taxpayers_tot_pre2005 - n_taxpayers_tot
	
	scatter diff_tot_2005 year if diff_tot_2005 !=0, name(diff_tot_2005, replace)
	scatter diff_tot_pre2005 year if diff_tot_pre2005 !=0, name(diff_tot_pre2005, replace)
	*/

* replace with actual taxpayers in pre-treatment period
	foreach var in n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_ctr {
	replace `var'_2005 = `var' if year < 2005
	}

* generate a weigth variable for each outcome
foreach outcome in n200K ln_n200K share200K ln_share200K reink ln_reink {
	gen weight_`outcome' = n_taxpayers_tot_2005
}

foreach outcome in reink_ctrl  ln_reink_ctrl  {
	gen weight_`outcome' = n_taxpayers_ctrl_2005
}

foreach outcome in reink_ctr  reink_ctr_w ln_reink_ctr ln_reink_ctr_w  {
	gen weight_`outcome' = n_taxpayers_ctr_2005
}

* * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *
*   TIME TREND ESTIMATION


global outcomes "n200K ln_n200K  share200K ln_share200K  reink ln_reink reink_ctr reink_ctr_w reink_ctrl ln_reink_ctr ln_reink_ctr_w ln_reink_ctrl "

* Estimate a time trend for each  outcome	 
cd "$mypathRR"
	
* estimate & predict canton trend
	preserve
		foreach outcome in $outcomes  {
			reg `outcome' trend_* cant_d* if year < 2006 & year > 1994 [aweight = weight_`outcome']
			predict tr`outcome', xb
			label var tr`outcome' "predicted cantonal time trend in `outcome'"
		}
		
		keep  cant year tr* 
		drop trend* treated
		bys cant year: keep if _n==1
	save "Datasets/ESTV/est_canton_trends-200k.dta", replace
	restore 	 
	drop cant_d*

* match trend back to original data
	merge m:1 year cant using "Datasets/ESTV/est_canton_trends-200k.dta", nogen keep(1 3)
	rm "Datasets/ESTV/est_canton_trends-200k.dta"

	
* generate residualized outcomes
foreach outcome in $outcomes {
	reg `outcome' tr`outcome' [aweight = weight_`outcome']
	predict resid`outcome', residual
	label var resid`outcome' "residualized `outcome' (after taking out canton trend)"
}

frame change default


* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 
frame create prepdta1
frame change prepdta1

use "$mypathRR/Datasets/ESTV/allcant_taxpayers_coll_B.dta", clear
keep snp_gdenr year treat n_taxpayers_tot n_taxpayers_tr reink_tr stbetr_tr  reink_tot stbetr_tot cant
duplicates drop
drop if snp_gdenr == .

	* winsorize observations with a high reink
	winsor2 reink_tr if treat == 1, cuts(0 99.9) suffix(_w) by(year)
	winsor2 reink_tot , cuts(0 98) suffix(_w) by(year)

	
// get income and revenue sums by municipality for treated and untreated
preserve
drop if treat==1
keep snp_gdenr year n_taxpayers_tr reink_tr reink_tr_w stbetr_tr
rename *_tr *_ctrl

tempfile control
save `control'
restore

preserve
drop if treat==0 
keep snp_gdenr year n_taxpayers_tr reink_tr reink_tr_w stbetr_tr

tempfile treated
save `treated'
restore


// get number of rich
preserve
drop if treat==0 
keep snp_gdenr year n_taxpayers_tr

rename n_taxpayers_tr n300K
	label var n300K "Number of rich"
	
tempfile rich
save `rich'
restore


* add the other variables
keep if treat == 0
drop n_taxpayers_tr reink_tr reink_tr_w stbetr_tr
drop treat
merge 1:1 snp_gdenr year using `treated'
merge 1:1 snp_gdenr year using `rich', nogen
merge 1:1 snp_gdenr year using `control', nogen


* fill the gaps of the municipality-years that do not exist in treated data
foreach var in n_taxpayers_tr reink_tr reink_tr_w stbetr_tr n300K {
	replace `var' = 0 if _merge == 1 & `var' == . 
}
drop _merge

rename snp_gdenr gemeinde
sort gemeinde year


* label municipalities
drop if gemeinde==.
run "$mypathRR/Resources/labels-municipalities.do"
label values gemeinde GEMEINDE

* generate a string variable with the municipality names
sdecode gemeinde, gen(Gemeinde) 


* xtset the data: municipality-year panel
xtset gemeinde year

tab cant [fweight = n_taxpayers_tot]

// Add info on number of taxpayers who moved away from canton C to OW
merge 1:1 gemeinde year using "$mypathRR/Datasets/movers/movers_gemeinde_origin.dta", nogen keepusing(Movers_origin Movers_origin_cum)
replace Movers_origin = 0 if Movers_origin == .
replace Movers_origin_cum = 0 if Movers_origin_cum == .

merge 1:1 gemeinde year using "$mypathRR/Datasets/movers/movers_gemeinde_destination.dta", nogen keepusing(Movers_destin Movers_destin_cum)

replace Movers_destin = 0 if Movers_destin == .
replace Movers_destin_cum = 0 if Movers_destin_cum == .

// generate a corrected number of taxpayers
gen n_taxpayers_tr_corr = n_taxpayers_tr + Movers_origin_cum
gen n_taxpayers_tot_corr = n_taxpayers_tot + Movers_origin_cum

gen n_taxpayers_tr_corr2 = n_taxpayers_tr + Movers_origin_cum - Movers_destin_cum
gen n_taxpayers_tot_corr2 = n_taxpayers_tot + Movers_origin_cum - Movers_destin_cum




// Generate and label the dependent variables
* gen averages
foreach var in reink_tot reink_tot_w reink_tr reink_tr_w reink_ctrl {
	replace `var'=`var'/10
	label var `var' "Net income p.c. (in 1000 CHF)"
}
  
foreach var in stbetr_tot stbetr_tr stbetr_ctrl {
  label var `var' "Tax revenue p.c. (in CHF)"
} 

rename stbetr_tot stbetr
rename reink_tot reink
rename reink_tot_w reink_w

* gen log incomes
foreach var in reink reink_w reink_tr reink_tr_w reink_ctrl stbetr stbetr_tr stbetr_ctrl {
	gen ln_`var' = ln(`var')
}

* gen share of rich taxpayers
gen share300K = n300K/n_taxpayers_tot*100
	label var share300K "Share of rich (in %)"

gen share300Kc = n_taxpayers_tr_corr/n_taxpayers_tot_corr*100
	label var share300Kc "Share of rich (in %), corrected"
	
gen share300Kc2 = n_taxpayers_tr_corr2/n_taxpayers_tot_corr2*100
	label var share300Kc2 "Share of rich (in %), corrected for all movers"
	
gen ln_n300K = ln(n300K)
	label var ln_n300K "Log rich taxpayers"
	
gen ln_share300K = ln(share300K)
	label var ln_share300K "Log share of rich (in %)"

* gen labels
label var n_taxpayers_tot "Total number of taxpayers"

label var ln_reink "Log of net income p.c. (in 1000 CHF)"
label var ln_reink_w "Log of net income p.c. (in 1000 CHF) [top 1% winsorized]"
label var ln_stbetr "Log of tax revenue p.c. (in CHF)"
  
label var reink_tr "Net income p.c. of treated (in 1000 CHF)"
label var reink_tr_w "Net income p.c. of treated (in 1000 CHF) [top 1% winsorized]"
label var stbetr_tr "Tax revenue from treated p.c. (in CHF)"
label var ln_reink_tr "Log of net income p.c. of treated (in 1000 CHF)"
label var ln_reink_tr_w "Log of net income p.c. of treated (in 1000 CHF) [top 1% winsorized]"
label var ln_stbetr_tr "Log of tax revenue from treated p.c. (in CHF)"

label var reink_ctrl "Net income p.c. of non-treated (in 1000 CHF)"
label var stbetr_ctrl "Tax revenue from non-treated p.c. (in CHF)"
label var ln_reink_ctrl "Log of net income p.c. of non-treated (in 1000 CHF)"
label var ln_stbetr_ctrl "Log of tax revenue from non-treated p.c. (in CHF)"

label var n_taxpayers_tr "Number of treated taxpayers"
label var n_taxpayers_ctrl "Number of non-treated taxpayers"




* Gen canton treatment dummies
gen treated = (cant == 6)
  label var treated "Treated"
  
gen period = (year > 2005 & year!=.)
  label var period "Period $ t>2005$"

* Gen DiD Interaction term
gen Interaction = treated*period
  label var Interaction "DiD"


* Gen interaction terms for pre-treatment periods
forval n=1/35 {
local i= 2006 -`n'
gen pre`n'=0
replace pre`n'= 1 if treated == 1 & year == `i'
label var pre`n' "`i' (treatment lag `n')"
}
replace pre1 = 0 // make 2005 the reference year


* drop dummies for years that do not exist in tax data and / or no tax data exists for OW
drop pre34 pre32 pre30 pre28 pre26 pre24 pre22 pre21 pre20 pre18 pre16 pre14 pre12 pre10 pre8 pre7 pre6 
// it is important for ES estimation to have a balanced sample!


* Gen interaction terms for post-treatment periods
forval n=1/11 {
local i = 2005 + `n'
gen post`n' = 0
replace post`n' = 1 if treated == 1 & year == `i'
label var post`n' "`i' (treatment lead `n')"
}

* Gen canton-specific time trends
	tab cant, gen (cant_d)
	foreach var of varlist cant_d* {
	gen trend_`var' = year*`var'
	label var trend_`var' "Canton specific time trend"
	}
	gen trend = year


* * * * define the correct weight variable for each outcome * * * * 

* get number of taxpayers in pre-treatment period
	frame put n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_tr cant year gemeinde , into(weights3)
	frame change weights3
		
		keep if year < 2006
		collapse n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_tr, by(cant gemeinde)
		foreach var in n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_tr {
			replace `var' = round(`var')
			rename `var' `var'_pre2005
		}
		duplicates tag gemeinde, gen(dups_w3)
		tab dups_w3
		drop if cant == . 
		drop dups_w3
		
	frame change prepdta1


	frame put n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_tr cant year gemeinde , into(weights4)
	frame change weights4

		keep if year == 2005
		drop year
		foreach var in n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_tr {
			rename `var' `var'_2005
		}
		duplicates tag gemeinde, gen(dups_w4)
		tab dups_w4
		drop if cant == . 
		drop dups_w4
		
	frame change prepdta1


* link back to main frame
	frlink m:1 gemeinde, frame(weights3) gen(link_weights3)
	frget n_taxpayers_tot_pre2005 n_taxpayers_ctrl_pre2005 n_taxpayers_tr_pre2005, from(link_weights3)
	// 619 missings, due to "new" municipalities appearing after 2005
	
	frlink m:1 gemeinde, frame(weights4) gen(link_weights4)
	frget n_taxpayers_tot_2005 n_taxpayers_ctrl_2005 n_taxpayers_tr_2005, from(link_weights4)
	// 4035  missings, due to mergers/splits of municipalities before and after 2005
	
	/*
	tab cant if n_taxpayers_tot_2005 == .
	// -> none of the treated municipalities in OW are affected.
	gen diff_tot_2005 = n_taxpayers_tot_2005 - n_taxpayers_tot
	gen diff_tot_pre2005 = n_taxpayers_tot_pre2005 - n_taxpayers_tot
	
	scatter diff_tot_2005 year if diff_tot_2005 !=0, name(diff_tot_2005, replace)
	scatter diff_tot_pre2005 year if diff_tot_pre2005 !=0, name(diff_tot_pre2005, replace)
	*/

* replace with actual taxpayers in pre-treatment period
	foreach var in n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_tr {
	replace `var'_2005 = `var' if year < 2005
	}
	

* gen outcome-specific weights
foreach outcome in n300K ln_n300K  share300K ln_share300K reink ln_reink reink_w ln_reink_w stbetr ln_stbetr {
	gen weight_`outcome' = n_taxpayers_tot_2005
}

foreach outcome in reink_ctrl stbetr_ctrl  ln_reink_ctrl ln_stbetr_ctrl {
	gen weight_`outcome' = n_taxpayers_ctrl_2005
}

foreach outcome in reink_tr reink_tr_w stbetr_tr ln_reink_tr ln_reink_tr_w ln_stbetr_tr {
	gen weight_`outcome' = n_taxpayers_tr_2005
}


gen weight_share300Kc = n_taxpayers_tot_corr
gen weight_share300Kc2 = n_taxpayers_tot_corr2

* * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *
*   TIME TREND ESTIMATION

global outcomes "n300K ln_n300K  share300Kc share300Kc2 share300K ln_share300K  reink ln_reink reink_tr  reink_w ln_reink_w reink_tr_w reink_ctrl ln_reink_tr ln_reink_tr_w ln_reink_ctrl   stbetr ln_stbetr stbetr_tr stbetr_ctrl ln_stbetr_tr ln_stbetr_ctrl"

* Estimate a time trend for each  outcome	 
cd "$mypathRR"
	
* estimate & predict canton trend
	preserve
		foreach outcome in $outcomes  {
			reg `outcome' trend_* cant_d* if year < 2006 & year > 1994 [aweight = weight_`outcome']
			predict tr`outcome', xb
			label var tr`outcome' "predicted cantonal time trend in `outcome'"
		}
		
		keep  cant year tr* 
		drop trend* treated
		bys cant year: keep if _n==1
	save "Datasets/ESTV/est_canton_trends.dta", replace
	restore 	 
	drop cant_d*

* match trend back to original data
	merge m:1 year cant using "Datasets/ESTV/est_canton_trends.dta", nogen keep(1 3)
	rm "Datasets/ESTV/est_canton_trends.dta"

	
* generate residualized outcomes
foreach outcome in $outcomes {
	reg `outcome' tr`outcome' [aweight = weight_`outcome']
	predict resid`outcome', residual
	label var resid`outcome' "residualized `outcome' (after taking out canton trend)"
}


* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *


*   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *


// LINK THE TWO FRAMES
frlink 1:1 year gemeinde, frame(prepdta2 year gemeinde) gen(lnkdta)

frget reink_ctr reink_ctr_w weight_reink_ctr weight_reink_ctr_w trreink_ctr trreink_ctr_w residreink_ctr residreink_ctr_w ///
 ln_reink_ctr ln_reink_ctr_w weight_ln_reink_ctr_w trln_reink_ctr trln_reink_ctr_w residln_reink_ctr residln_reink_ctr_w weight_ln_reink_ctr ///
share200K ln_share200K weight_share200K weight_ln_share200K trshare200K trln_share200K residshare200K residln_share200K , from(lnkdta)


foreach var in reink_ctr reink_ctr_w weight_reink_ctr weight_reink_ctr_w trreink_ctr trreink_ctr_w residreink_ctr residreink_ctr_w ///
 ln_reink_ctr ln_reink_ctr_w weight_ln_reink_ctr_w trln_reink_ctr trln_reink_ctr_w residln_reink_ctr residln_reink_ctr_w weight_ln_reink_ctr ///
share200K ln_share200K weight_share200K weight_ln_share200K trshare200K trln_share200K residshare200K residln_share200K {
	replace `var' = 0 if `var' == . 
}
